import scrapy
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule
from scrapy_05_readbooknet.items import  Scrapy05ReadbooknetItem

class ReadbookSpider(CrawlSpider):
    name = "readbook"
    allowed_domains = ["www.dushu.com"]
    start_urls = ["https://www.dushu.com/book/1188_1.html"]

    rules = (
        Rule(LinkExtractor(allow=r"/book/1188_\d+\.html"),# 提取分页栏中的链接对象
             callback="parse_item",
             follow=True), # False只会解析当前页面的分页栏，但是True会解析全部页面的分页栏
    )

    def parse_item(self, response):
        img_list=response.xpath('//div[@class="bookslist"]/ul/li//img')
        for img in img_list:
            name=img.xpath('./@alt').extract_first()
            src=img.xpath('./@data-original').extract_first()
            # print(name,src)
            book =Scrapy05ReadbooknetItem(name=name,src=src)
            yield book
